PCANet: Learning diffusivity (m) to solution (u) map for the Poisson problem¶
Data is located in ../data directory, and key data of our interest is in Poisson_samples.npz file.
On data¶
The Dropbox folder NeuralOperator_Survey_Shared_Data_March2025 contains the key data to reproduce the results in the survey paper.
If you did not generate data by running survey_work/problems/poisson/Poisson.ipynb, consider copying the contents of dropbox folder NeuralOperator_Survey_Shared_Data_March2025/survey_work/problems/poisson/data/ into survey_work/problems/poisson/data/ before running this notebook.
Results¶
Below shows the neural operator prediction for different samples of test input.
In [1]:
import sys
import os
import numpy as np # load numpy before torch
import torch
src_path = "../../src/"
sys.path.append(src_path + 'plotting/')
from field_plot import field_plot
from plot_loss import plot_loss
from plot_svd import plot_s_vec_values
sys.path.append(src_path + 'data/')
from dataMethods import DataProcessor
sys.path.append(src_path + 'nn/pcanet/')
sys.path.append(src_path + 'nn/mlp/') # need this here so that PCANet can be imported (it imports MLP)
from torch_pcanet import PCANet
import uq
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'
# set seed
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
from sklearn.metrics import roc_auc_score, roc_curve
In [2]:
data_folder = '../../../autodl-tmp/data/'
results_dir = data_folder
if not os.path.exists(results_dir):
os.makedirs(results_dir)
Load data¶
In [3]:
num_train = 3500
num_test = 1000
num_inp_fn_points = 2601 # number of grid points for the input function
num_out_fn_points = 2601 # number of evaluations points for the output function
num_Y_components = 1 # scalar field
num_inp_red_dim = 100 # number of reduced dimensions for the input data
num_out_red_dim = 100 # number of reduced dimensions for the output data
out_coordinate_dimension = 2 # domain for output function is 2D
# training hyperparameters
batch_size = 20
epochs = 1000
lr = 1.0e-3
act_fn = torch.relu
data_prefix = 'Poisson'
data = DataProcessor(data_folder + data_prefix + '_samples_no-ood.npz', num_train, num_test, num_inp_fn_points, num_out_fn_points, num_Y_components, num_inp_red_dim, num_out_red_dim)
train_data = {'X_train': data.X_train, 'X_trunk': data.X_trunk, 'Y_train': data.Y_train}
test_data = {'X_train': data.X_test, 'X_trunk': data.X_trunk, 'Y_train': data.Y_test}
print('X_train:',data.X_train.shape)
print('Y_train:',data.Y_train.shape)
print('X_test:',data.X_test.shape)
print('Y_test:',data.Y_test.shape)
print('X_trunk:',data.X_trunk.shape)
print('X_train_svd_projector:',data.X_train_svd_projector.shape)
print('Y_train_svd_projector:',data.Y_train_svd_projector.shape)
X_train: (3500, 100) Y_train: (3500, 100) X_test: (1000, 100) Y_test: (1000, 100) X_trunk: (2601, 2) X_train_svd_projector: (100, 2601) Y_train_svd_projector: (100, 2601)
Plot singular values¶
In [4]:
plot_annot_xy = [0.15, 0.35, 0.8, 0.5]
plot_annot_xy_region = [-5, 300, -0.04, 0.15]
xy_text_vec = []
xy_text_vec.append([(10, 20), (-20, -25)])
xy_text_vec.append([(5, -5), (-20, -25)])
xy_text_vec.append([(-20, 15), (-20, -25)])
l_style_vec = ['-', '-']
plot_s_vec_values([data.X_train_s_values, data.Y_train_s_values], [num_inp_red_dim, num_out_red_dim], \
['m', 'u'], \
l_style_vec, \
xy_text_vec, \
plot_annot_xy, \
plot_annot_xy_region, \
results_dir + data_prefix + '_svd_analysis_m_u')
j = 0, i = 0, index = 100, index_val = 0.032590066211883206 j = 0, i = 1, index = 100, index_val = 0.0033409165710523744 j = 1, i = 0, index = 35, index_val = 0.1006803895025745 j = 1, i = 1, index = 10, index_val = 0.10902619155367585 j = 2, i = 0, index = 289, index_val = 0.01000805121239042 j = 2, i = 1, index = 50, index_val = 0.009894120146498975
Create model and train the network¶
In [5]:
num_layers = 4
num_neurons = 250
model_save_path = results_dir + 'PCANet/'
model_save_file = model_save_path + 'model.pkl'
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)
model = PCANet(num_layers, num_neurons, act_fn, \
num_inp_red_dim, num_out_red_dim, \
save_file = model_save_file)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Number of trainable parameters: {}'.format(trainable_params))
Using device: cuda Number of trainable parameters: 175850
In [6]:
# save the data and info
data_to_save = data.get_data_to_save()
model_metadata = { 'data': data_to_save, \
'num_train': num_train, \
'num_test': num_test, \
'num_inp_fn_points': num_inp_fn_points, \
'num_out_fn_points': num_out_fn_points, \
'num_Y_components': num_Y_components, \
'out_coordinate_dimension': out_coordinate_dimension, \
'num_inp_red_dim': num_inp_red_dim, \
'num_out_red_dim': num_out_red_dim, \
'num_layers': num_layers, \
'num_neurons': num_neurons, \
'epochs': epochs, \
'batch_size': batch_size, \
'lr': lr}
# attach it to the model
model.metadata = model_metadata
In [7]:
# Train
model.train(train_data, test_data, batch_size=batch_size, \
epochs = epochs, lr = lr, \
save_model = True, save_epoch = 100)
-------------------------------------------------- Starting training with 175850 trainable parameters... -------------------------------------------------- -------------------------------------------------- Epoch: 1, Train Loss (l2 squared): 5.530e+00, Test Loss (l2 squared): 1.641e+00, Time (sec): 0.356 -------------------------------------------------- -------------------------------------------------- Epoch: 100, Train Loss (l2 squared): 1.245e-01, Test Loss (l2 squared): 2.648e-01, Time (sec): 0.259 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 100 -------------------------------------------------- -------------------------------------------------- Epoch: 200, Train Loss (l2 squared): 6.100e-02, Test Loss (l2 squared): 2.165e-01, Time (sec): 0.207 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 200 -------------------------------------------------- -------------------------------------------------- Epoch: 300, Train Loss (l2 squared): 4.109e-02, Test Loss (l2 squared): 2.039e-01, Time (sec): 0.242 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 300 -------------------------------------------------- -------------------------------------------------- Epoch: 400, Train Loss (l2 squared): 3.341e-02, Test Loss (l2 squared): 2.023e-01, Time (sec): 0.152 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 400 -------------------------------------------------- -------------------------------------------------- Epoch: 500, Train Loss (l2 squared): 2.995e-02, Test Loss (l2 squared): 2.025e-01, Time (sec): 0.194 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 500 -------------------------------------------------- -------------------------------------------------- Epoch: 600, Train Loss (l2 squared): 2.836e-02, Test Loss (l2 squared): 2.028e-01, Time (sec): 0.252 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 600 -------------------------------------------------- -------------------------------------------------- Epoch: 700, Train Loss (l2 squared): 2.760e-02, Test Loss (l2 squared): 2.031e-01, Time (sec): 0.211 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 700 -------------------------------------------------- -------------------------------------------------- Epoch: 800, Train Loss (l2 squared): 2.717e-02, Test Loss (l2 squared): 2.033e-01, Time (sec): 0.291 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 800 -------------------------------------------------- -------------------------------------------------- Epoch: 900, Train Loss (l2 squared): 2.695e-02, Test Loss (l2 squared): 2.034e-01, Time (sec): 0.219 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 900 -------------------------------------------------- -------------------------------------------------- Epoch: 1000, Train Loss (l2 squared): 2.684e-02, Test Loss (l2 squared): 2.035e-01, Time (sec): 0.202 -------------------------------------------------- -------------------------------------------------- Model parameters saved at epoch 1000 -------------------------------------------------- -------------------------------------------------- Train time: 224.328, Epochs: 1000, Batch Size: 20, Final Train Loss (l2 squared): 2.684e-02, Final Test Loss (l2 squared): 2.035e-01 --------------------------------------------------
In [8]:
## Plotting the loss history
plot_loss( model.train_loss_log[:, 0], \
model.test_loss_log[:, 0], \
fs = 14, lw = 2, \
savefile = results_dir+'loss_his.png', \
figsize = [6,6])
Test and plot the output of network¶
In [9]:
# load the model
model = torch.load(model_save_file, weights_only=False)
sfname = model_save_file.split(os.path.sep)
print('-'*50)
print('Model loaded from: {}'.format(sfname[-2] + '/' + sfname[-1]))
print('\n' + '-'*50)
print('Model metadata:', model.metadata.keys())
print('\n' + '-'*50)
print('Model:', model)
--------------------------------------------------
Model loaded from: PCANet/model.pkl
--------------------------------------------------
Model metadata: dict_keys(['data', 'num_train', 'num_test', 'num_inp_fn_points', 'num_out_fn_points', 'num_Y_components', 'out_coordinate_dimension', 'num_inp_red_dim', 'num_out_red_dim', 'num_layers', 'num_neurons', 'epochs', 'batch_size', 'lr'])
--------------------------------------------------
Model: PCANet(
(net): MLP(
(layers): ModuleList(
(0): Linear(in_features=100, out_features=250, bias=True)
(1-2): 2 x Linear(in_features=250, out_features=250, bias=True)
(3): Linear(in_features=250, out_features=100, bias=True)
)
)
)
In [10]:
Y_test = test_data['Y_train']
Y_test_pred = model.predict(test_data['X_train']).detach().cpu().numpy()
print('test_out shape: {}, test_pred shape: {}'.format(Y_test.shape, Y_test_pred.shape))
error = np.linalg.norm(Y_test - Y_test_pred, axis = 1)/np.linalg.norm(Y_test, axis = 1)
print('Num tests: {:5d}, Mean Loss (rel l2): {:.3e}, Std Loss (rel l2): {:.3e}'.format(num_test, np.mean(error), np.std(error)))
test_out shape: (1000, 100), test_pred shape: (1000, 100) Num tests: 1000, Mean Loss (rel l2): 1.060e-01, Std Loss (rel l2): 3.565e-02
In [11]:
def apply_dirichlet_bc(u, bc_value, bc_node_ids):
u[bc_node_ids] = bc_value
return u
In [12]:
rows, cols = 4, 4
fs = 20
fig, axs = plt.subplots(rows, cols, figsize=(16, 13))
decode = True
apply_dirichlet_bc_flag = True
# row: m, u_true, u_pred, u_diff
u_tags = [r'$m$', r'$u_{true}$', r'$u_{pred}$', r'$u_{true} - u_{pred}$']
cmaps = ['jet', 'viridis', 'viridis', 'hot']
nodes = data.X_trunk
# randomly choose rows number of samples
i_choices = np.random.choice(num_test, rows, replace=False)
for i in range(rows):
i_plot = i_choices[i]
i_pred = Y_test_pred[i_plot]
i_truth = Y_test[i_plot]
i_m_test = data.X_test[i_plot]
if decode:
i_pred = data.decoder_Y(i_pred)
i_truth = data.decoder_Y(i_truth)
i_m_test = data.decoder_X(i_m_test)
if apply_dirichlet_bc_flag:
i_pred = apply_dirichlet_bc(i_pred, 0.0, data.u_mesh_dirichlet_boundary_nodes)
# verify for i_truth
if np.abs(i_truth[data.u_mesh_dirichlet_boundary_nodes]).max() > 1.0e-9:
print('Warning: Dirichlet BC not applied to i_truth. Err : {}'.format(np.abs(i_truth[data.u_mesh_dirichlet_boundary_nodes]).max()))
i_diff = i_pred - i_truth
i_diff_norm = np.linalg.norm(i_diff) / np.linalg.norm(i_truth)
print('i_plot = {:5d}, error (rel l2): {:.3e}'.format(i_plot, i_diff_norm))
uvec = [i_m_test, i_truth, i_pred, i_diff]
for j in range(cols):
cbar = field_plot(axs[i,j], uvec[j], nodes, cmap = cmaps[j])
divider = make_axes_locatable(axs[i,j])
cax = divider.append_axes('right', size='8%', pad=0.03)
cax.tick_params(labelsize=fs)
if j == 0 or j == cols - 1:
# format cbar ticks
kfmt = lambda x, pos: "{:g}".format(x)
cbar = fig.colorbar(cbar, cax=cax, orientation='vertical', format = kfmt)
else:
cbar = fig.colorbar(cbar, cax=cax, orientation='vertical')
if i == 0 and j < cols - 1:
axs[i,j].set_title(u_tags[j], fontsize=fs)
if j == cols - 1:
err_str = 'err (rel l2): {:.3f}%'.format(i_diff_norm*100)
if i == 0:
err_str = u_tags[j] + '\n' + err_str
axs[i,j].set_title(err_str, fontsize=fs)
axs[i,j].axis('off')
fig.tight_layout()
fig.suptitle('Poisson problem: Compare neural operator predictions ({})'.format(model.name), fontsize=1.25*fs, y=1.025)
fig.savefig(results_dir+'neural_operator_prediction_comparison.png', bbox_inches='tight')
plt.show()
i_plot = 801, error (rel l2): 7.305e-03 i_plot = 311, error (rel l2): 9.159e-03 i_plot = 85, error (rel l2): 7.269e-03 i_plot = 435, error (rel l2): 7.810e-03
HMC¶
In [7]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
# Use a subset of test data for HMC (full dataset may be too slow)
num_hmc_samples_data = 1000 # number of data points to use
hmc_indices = np.random.choice(num_test, num_hmc_samples_data, replace=False)
# For PCANet, we use the reduced-dimension inputs/outputs
x_hmc = test_data['X_train'][hmc_indices]
y_hmc = test_data['Y_train'][hmc_indices]
print(f"Using {num_hmc_samples_data} test samples for HMC")
print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")
# Initialize from current model parameters
flat0 = uq.pack_params(model).to(device)
print(f"Initial parameter vector shape: {flat0.shape}")
# Create log probability function
# Note: PCANet uses X -> Y mapping directly (no trunk network)
log_prob = uq.make_log_prob_fn(model, x_hmc, y_hmc,
noise_std=0.05, prior_std=1.0)
# Adaptive HMC settings
hmc_num_samples = 2000
hmc_burn_in = 200 # Increased burn-in for adaptation
hmc_adapt_steps = 150 # Steps to adapt step size
hmc_initial_step_size = 1e-7
hmc_leapfrog_steps = 20
hmc_target_accept = 0.75 # Target acceptance rate (65-80% is optimal)
print(f"\nAdaptive HMC Settings:")
print(f" num_samples: {hmc_num_samples}")
print(f" burn_in: {hmc_burn_in}")
print(f" adapt_steps: {hmc_adapt_steps}")
print(f" initial_step_size: {hmc_initial_step_size}")
print(f" leapfrog_steps: {hmc_leapfrog_steps}")
print(f" target_accept: {hmc_target_accept}")
print()
hmcsamples, acc_rate, final_step_size, step_size_history = uq.hmc_adaptive(
log_prob,
flat0.requires_grad_(True),
target_accept=hmc_target_accept,
initial_step_size=hmc_initial_step_size,
leapfrog_steps=hmc_leapfrog_steps,
num_samples=hmc_num_samples,
burn_in=hmc_burn_in,
adapt_steps=hmc_adapt_steps
)
print(f"\n{'='*60}")
print(f"Final Results:")
print(f" Acceptance rate: {acc_rate:.3f} ({acc_rate*100:.1f}%)")
print(f" Final step size: {final_step_size:.2e}")
print(f" Collected {len(hmcsamples)} samples")
print(f"{'='*60}")
Using 1000 test samples for HMC Model has 175850 parameters Initial parameter vector shape: torch.Size([175850]) Adaptive HMC Settings: num_samples: 2000 burn_in: 200 adapt_steps: 150 initial_step_size: 1e-07 leapfrog_steps: 20 target_accept: 0.75 Starting adaptive HMC with target acceptance rate: 75.00% Adaptation will run for 150 iterations Iter 50/2200: accept rate = 0.740, step_size = 1.38e-06, phase = adapting Iter 100/2200: accept rate = 0.710, step_size = 1.62e-06, phase = adapting Iter 150/2200: accept rate = 0.707, step_size = 6.19e-06, phase = adapting >>> Adaptation complete! Final step size: 2.60e-06 >>> Acceptance rate during adaptation: 0.702 Iter 200/2200: accept rate = 0.735, step_size = 2.60e-06, phase = burn-in Iter 250/2200: accept rate = 0.772, step_size = 2.60e-06, phase = sampling Iter 300/2200: accept rate = 0.787, step_size = 2.60e-06, phase = sampling Iter 350/2200: accept rate = 0.791, step_size = 2.60e-06, phase = sampling Iter 400/2200: accept rate = 0.797, step_size = 2.60e-06, phase = sampling Iter 450/2200: accept rate = 0.809, step_size = 2.60e-06, phase = sampling Iter 500/2200: accept rate = 0.806, step_size = 2.60e-06, phase = sampling Iter 550/2200: accept rate = 0.811, step_size = 2.60e-06, phase = sampling Iter 600/2200: accept rate = 0.813, step_size = 2.60e-06, phase = sampling Iter 650/2200: accept rate = 0.818, step_size = 2.60e-06, phase = sampling Iter 700/2200: accept rate = 0.826, step_size = 2.60e-06, phase = sampling Iter 750/2200: accept rate = 0.832, step_size = 2.60e-06, phase = sampling Iter 800/2200: accept rate = 0.836, step_size = 2.60e-06, phase = sampling Iter 850/2200: accept rate = 0.840, step_size = 2.60e-06, phase = sampling Iter 900/2200: accept rate = 0.846, step_size = 2.60e-06, phase = sampling Iter 950/2200: accept rate = 0.854, step_size = 2.60e-06, phase = sampling Iter 1000/2200: accept rate = 0.857, step_size = 2.60e-06, phase = sampling Iter 1050/2200: accept rate = 0.861, step_size = 2.60e-06, phase = sampling Iter 1100/2200: accept rate = 0.863, step_size = 2.60e-06, phase = sampling Iter 1150/2200: accept rate = 0.867, step_size = 2.60e-06, phase = sampling Iter 1200/2200: accept rate = 0.873, step_size = 2.60e-06, phase = sampling Iter 1250/2200: accept rate = 0.874, step_size = 2.60e-06, phase = sampling Iter 1300/2200: accept rate = 0.875, step_size = 2.60e-06, phase = sampling Iter 1350/2200: accept rate = 0.876, step_size = 2.60e-06, phase = sampling Iter 1400/2200: accept rate = 0.878, step_size = 2.60e-06, phase = sampling Iter 1450/2200: accept rate = 0.879, step_size = 2.60e-06, phase = sampling Iter 1500/2200: accept rate = 0.881, step_size = 2.60e-06, phase = sampling Iter 1550/2200: accept rate = 0.884, step_size = 2.60e-06, phase = sampling Iter 1600/2200: accept rate = 0.886, step_size = 2.60e-06, phase = sampling Iter 1650/2200: accept rate = 0.888, step_size = 2.60e-06, phase = sampling Iter 1700/2200: accept rate = 0.889, step_size = 2.60e-06, phase = sampling Iter 1750/2200: accept rate = 0.889, step_size = 2.60e-06, phase = sampling Iter 1800/2200: accept rate = 0.891, step_size = 2.60e-06, phase = sampling Iter 1850/2200: accept rate = 0.893, step_size = 2.60e-06, phase = sampling Iter 1900/2200: accept rate = 0.895, step_size = 2.60e-06, phase = sampling Iter 1950/2200: accept rate = 0.895, step_size = 2.60e-06, phase = sampling Iter 2000/2200: accept rate = 0.896, step_size = 2.60e-06, phase = sampling Iter 2050/2200: accept rate = 0.898, step_size = 2.60e-06, phase = sampling Iter 2100/2200: accept rate = 0.900, step_size = 2.60e-06, phase = sampling Iter 2150/2200: accept rate = 0.901, step_size = 2.60e-06, phase = sampling Iter 2200/2200: accept rate = 0.902, step_size = 2.60e-06, phase = sampling ============================================================ Final Results: Acceptance rate: 0.902 (90.2%) Final step size: 2.60e-06 Collected 2000 samples ============================================================
In [8]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std1, result1 = uq.uqevaluation(num_test, test_data, model, data, method='hmc', hmcsamples=hmcsamples.clone())
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.plot_uq(num_test,test_data,model,data,method='hmc',hmcsamples=hmcsamples.clone())
Evaluating uncertainty on 200 test samples... Computing posterior predictions... ============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 0.017632 MAE: 0.011963 Mean Relative L2 Error: 0.59% Std Relative L2 Error: 0.09% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 98.5% (ideal: 68.3%) Coverage within 2σ: 99.8% (ideal: 95.4%) Coverage within 3σ: 100.0% (ideal: 99.7%) Status: OVER-CONFIDENT (uncertainties too small) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 0.002522 Mean Total σ: 0.050118 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 0.503 → Good! High uncertainty correlates with high error 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 0.5% Aleatoric fraction: 99.5% → Data noise dominates (model is confident) 6. PROPER SCORING RULES: Negative Log-Likelihood: -2.0141 (Lower is better) ============================================================ Computing HMC predictions for 5 visualization samples...
Monte-Carlo Dropout¶
In [9]:
import importlib
importlib.reload(uq)
# Function to enable dropout during inference
model = torch.load(model_save_file, weights_only=False)
uq.inject_dropout(model)
torch.nn.Module.train(model)
for module in model.modules():
if isinstance(module, torch.nn.Dropout):
torch.nn.Module.train(module)
In [10]:
std2, result2 = uq.uqevaluation(num_test, test_data, model, data, method='mcd')
uq.plot_uq(num_test, test_data, model, data, method='mcd')
Evaluating uncertainty on 200 test samples... ============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 0.033399 MAE: 0.022505 Mean Relative L2 Error: 1.06% Std Relative L2 Error: 0.35% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 100.0% (ideal: 68.3%) Coverage within 2σ: 100.0% (ideal: 95.4%) Coverage within 3σ: 100.0% (ideal: 99.7%) Status: OVER-CONFIDENT (uncertainties too small) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 0.151001 Mean Total σ: 0.163596 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 0.442 → Moderate correlation 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 77.6% Aleatoric fraction: 22.4% → Balanced uncertainty sources 6. PROPER SCORING RULES: Negative Log-Likelihood: -1.0255 (Lower is better) ============================================================
Laplace Approximation¶
In [11]:
# Reload the original model (without dropout) for Laplace approximation
model = torch.load(model_save_file, weights_only=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Laplace approximation settings
noise_std_laplace = 0.05 # Assumed observation noise (same as HMC/MC Dropout for fair comparison)
prior_std_laplace = 1.0 # Prior standard deviation for weights
print(f"\nLaplace Approximation Settings:")
print(f" Assumed noise std: {noise_std_laplace}")
print(f" Prior std: {prior_std_laplace}")
# Use a subset of training data for computing the Hessian (full dataset may be too expensive)
num_laplace_data = min(500, num_train)
laplace_indices = np.random.choice(num_train, num_laplace_data, replace=False)
x_laplace = train_data['X_train'][laplace_indices]
y_laplace = train_data['Y_train'][laplace_indices]
print(f"Using {num_laplace_data} training samples for Hessian computation")
print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")
print("\nComputing diagonal Hessian approximation...")
H_diag = uq.compute_diagonal_hessian(
model, x_laplace, y_laplace,
noise_std_laplace, prior_std_laplace, device, batch_size=10, sample_outputs_per_batch=30
)
# Posterior variance (diagonal approximation)
# σ²_posterior = 1 / H_diag
# IMPORTANT: Clip the posterior variance to prevent excessively large perturbations
max_posterior_var = 0.01 ** 2 # Maximum variance corresponds to std of 0.01
posterior_var = 1.0 / (H_diag + 1e-8) # Raw posterior variance
posterior_var = torch.clamp(posterior_var, max=max_posterior_var) # Clip to prevent huge perturbations
print(f"\nHessian diagonal statistics:")
print(f" Mean H_diag: {H_diag.mean().item():.4e}")
print(f" Max H_diag: {H_diag.max().item():.4e}")
print(f" Min H_diag: {H_diag.min().item():.4e}")
print(f" Mean posterior σ (clipped): {torch.sqrt(posterior_var).mean().item():.4e}")
print(f" Max posterior σ (clipped): {torch.sqrt(posterior_var).max().item():.4e}")
# Clean up Hessian computation variables
del x_laplace, y_laplace
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# ============================================================
# Sample from the Laplace posterior
# θ ~ N(θ_MAP, diag(1/H_diag))
# ============================================================
print("\nSampling from Laplace posterior...")
num_laplace_samples = 2000 # Number of posterior samples
theta_map = uq.pack_params(model).to(device)
# Compute posterior std once
posterior_std_vec = torch.sqrt(posterior_var)
# Sample from the posterior and store on CPU to save GPU memory
laplace_samples = []
for i in range(num_laplace_samples):
# Sample: θ = θ_MAP + σ_posterior * ε, where ε ~ N(0, I)
epsilon = torch.randn_like(theta_map)
theta_sample = theta_map + posterior_std_vec * epsilon
laplace_samples.append(theta_sample.cpu())
del epsilon, theta_sample
laplace_samples = torch.stack(laplace_samples)
print(f"Generated {num_laplace_samples} posterior samples")
# Free memory from Hessian computation
del H_diag, posterior_var, posterior_std_vec
torch.cuda.empty_cache() if torch.cuda.is_available() else None
Laplace Approximation Settings: Assumed noise std: 0.05 Prior std: 1.0 Using 500 training samples for Hessian computation Model has 175850 parameters Computing diagonal Hessian approximation...
Processed 100/500 samples Processed 200/500 samples Processed 300/500 samples Processed 400/500 samples Processed 500/500 samples Hessian diagonal statistics: Mean H_diag: 5.5446e+05 Max H_diag: 9.7607e+07 Min H_diag: 1.0000e+00 Mean posterior σ (clipped): 3.5005e-03 Max posterior σ (clipped): 1.0000e-02 Sampling from Laplace posterior... Generated 2000 posterior samples
In [12]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std3, result3 = uq.uqevaluation(num_test, test_data, model, data, method='la', lasamples=laplace_samples)
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.plot_uq(num_test, test_data, model, data, method='la', lasamples=laplace_samples)
Evaluating uncertainty on 200 test samples... ============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 0.029732 MAE: 0.019645 Mean Relative L2 Error: 0.95% Std Relative L2 Error: 0.27% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 93.2% (ideal: 68.3%) Coverage within 2σ: 99.0% (ideal: 95.4%) Coverage within 3σ: 99.8% (ideal: 99.7%) Status: OVER-CONFIDENT (uncertainties too small) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 0.005489 Mean Total σ: 0.050421 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 0.581 → Good! High uncertainty correlates with high error 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 1.6% Aleatoric fraction: 98.4% → Data noise dominates (model is confident) 6. PROPER SCORING RULES: Negative Log-Likelihood: -1.9035 (Lower is better) ============================================================
In [13]:
uq.comparison_uq(result1,result2,result3)
====================================================================== COMPARISON: HMC vs MC Dropout vs Laplace Approximation ====================================================================== Metric HMC MC Dropout Laplace Ideal ------------------------------------------------------------------------------------- RMSE 0.017632 0.033399 0.029732 Lower MAE 0.011963 0.022505 0.019645 Lower Mean Rel. L2 Error (%) 0.59 1.06 0.95 Lower Coverage 1σ (%) 98.5 100.0 93.2 68.3 Coverage 2σ (%) 99.8 100.0 99.0 95.4 Coverage 3σ (%) 100.0 100.0 99.8 99.7 Mean Epistemic σ 0.002522 0.151001 0.005489 - Mean Total σ 0.050118 0.163596 0.050421 - Epistemic Fraction (%) 0.5 77.6 1.6 - Uncertainty-Error Corr. 0.503 0.442 0.581 Higher NLL -2.0141 -1.0255 -1.9035 Lower
Behaviour on OOD data¶
In [14]:
datas = DataProcessor(data_folder + data_prefix + '_samples_ood.npz', 100, 600, num_inp_fn_points, num_out_fn_points, num_Y_components, num_inp_red_dim, num_out_red_dim)
ood_data = {'X_train': datas.X_test, 'X_trunk': datas.X_trunk, 'Y_train': datas.Y_test}
In [15]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std4, result4 = uq.uqevaluation(500, ood_data, model, datas, method = 'hmc', hmcsamples=hmcsamples.clone())
Evaluating uncertainty on 200 test samples... Computing posterior predictions...
============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 10.186084 MAE: 5.552376 Mean Relative L2 Error: 765.33% Std Relative L2 Error: 929.47% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 17.1% (ideal: 68.3%) Coverage within 2σ: 26.2% (ideal: 95.4%) Coverage within 3σ: 32.9% (ideal: 99.7%) Status: UNDER-CONFIDENT (uncertainties too large) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 0.016884 Mean Total σ: 0.056963 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 0.978 → Good! High uncertainty correlates with high error 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 12.6% Aleatoric fraction: 87.4% → Data noise dominates (model is confident) 6. PROPER SCORING RULES: Negative Log-Likelihood: 10949.2194 (Lower is better) ============================================================
In [16]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
uq.inject_dropout(model)
torch.nn.Module.train(model)
for module in model.modules():
if isinstance(module, torch.nn.Dropout):
torch.nn.Module.train(module)
std5, result5 = uq.uqevaluation(500, ood_data, model, datas, 'mcd')
Evaluating uncertainty on 200 test samples... ============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 10.444515 MAE: 5.984565 Mean Relative L2 Error: 827.39% Std Relative L2 Error: 918.46% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 31.2% (ideal: 68.3%) Coverage within 2σ: 58.2% (ideal: 95.4%) Coverage within 3σ: 93.5% (ideal: 99.7%) Status: UNDER-CONFIDENT (uncertainties too large) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 2.658173 Mean Total σ: 2.666734 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 0.983 → Good! High uncertainty correlates with high error 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 84.0% Aleatoric fraction: 16.0% → Model uncertainty dominates (more data may help) 6. PROPER SCORING RULES: Negative Log-Likelihood: 2.4249 (Lower is better) ============================================================
In [17]:
model = torch.load(model_save_file, weights_only=False)
model.to(device)
std6, result6 = uq.uqevaluation(500, ood_data, model, datas, 'la', lasamples=laplace_samples.clone())
Evaluating uncertainty on 200 test samples... ============================================================ Uncertainty Quality Metrics ============================================================ 1. PREDICTION ACCURACY: RMSE: 10.313079 MAE: 5.781051 Mean Relative L2 Error: 800.17% Std Relative L2 Error: 921.98% 2. CALIBRATION (Coverage Analysis): Coverage within 1σ: 16.9% (ideal: 68.3%) Coverage within 2σ: 26.1% (ideal: 95.4%) Coverage within 3σ: 32.5% (ideal: 99.7%) Status: UNDER-CONFIDENT (uncertainties too large) 3. SHARPNESS (Uncertainty Magnitude): Mean Epistemic σ: 0.062018 Mean Total σ: 0.093024 Mean Aleatoric σ: 0.050000 (fixed) 4. UNCERTAINTY-ERROR CORRELATION: Pearson correlation: 1.000 → Good! High uncertainty correlates with high error 5. UNCERTAINTY DECOMPOSITION: Epistemic fraction: 34.4% Aleatoric fraction: 65.6% → Balanced uncertainty sources 6. PROPER SCORING RULES: Negative Log-Likelihood: 1611.6868 (Lower is better) ============================================================
In [18]:
uq.comparison_uq(result4, result5, result6)
====================================================================== COMPARISON: HMC vs MC Dropout vs Laplace Approximation ====================================================================== Metric HMC MC Dropout Laplace Ideal ------------------------------------------------------------------------------------- RMSE 10.186084 10.444515 10.313079 Lower MAE 5.552376 5.984565 5.781051 Lower Mean Rel. L2 Error (%) 765.33 827.39 800.17 Lower Coverage 1σ (%) 17.1 31.2 16.9 68.3 Coverage 2σ (%) 26.2 58.2 26.1 95.4 Coverage 3σ (%) 32.9 93.5 32.5 99.7 Mean Epistemic σ 0.016884 2.658173 0.062018 - Mean Total σ 0.056963 2.666734 0.093024 - Epistemic Fraction (%) 12.6 84.0 34.4 - Uncertainty-Error Corr. 0.978 0.983 1.000 Higher NLL 10949.2194 2.4249 1611.6868 Lower
ood data detection¶
In [23]:
hmc_ood_eval = np.concatenate((std1.mean(axis=1),std4.mean(axis=1)), axis=0) # epistemic + aleatoric
hmc_ood_eval = hmc_ood_eval/np.max(hmc_ood_eval)
mcd_ood_eval = np.concatenate((std2.mean(axis=1),std5.mean(axis=1)), axis=0)
mcd_ood_eval = mcd_ood_eval/np.max(mcd_ood_eval)
la_ood_eval = np.concatenate((std3.mean(axis=1),std6.mean(axis=1)), axis=0)
la_ood_eval = la_ood_eval/np.max(la_ood_eval)
oods = np.concatenate((np.zeros(std1.shape[0]), np.ones(std4.shape[0])), axis=0) # 0 for ID, 1 for OOD
#Examine OOD data:
# Step 1: Generate uncertainty scores: ood_eval
# Step 2: Create true labels for AUROC (1 for OOD, 0 for ID): oods
# Step 3: Calculate AUROC
for ood, uqmethod in zip([hmc_ood_eval, mcd_ood_eval, la_ood_eval], ['HMC', 'MC Dropout', 'Laplace Approximation']):
auroc = roc_auc_score(oods,ood)
fpr, tpr, _ = roc_curve(oods, ood)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUROC = {auroc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'Receiver Operating Characteristic (ROC) using {uqmethod}')
plt.legend(loc='lower right')
plt.show()
print(f"AUROC for OOD detection: {auroc}")
AUROC for OOD detection: 0.7480249999999999
AUROC for OOD detection: 0.801425
AUROC for OOD detection: 0.75305